---
title: "Summary of HEI PM10 Report Analysis"
author: "Cory Zigler"
date: "April 22, 2015"
output: pdf_document
---


```{r, echo=FALSE}
## The following script will reproduce the figures, tables, and numbers reported in the report
## Note that any number involving raw Medicare will be different, since fake simulated Medicare data are distributed with these materials
rm(list=ls())
set.seed(51)
library(HEIfunctions)
library(maps)
library(PSAgraphics)

datadir = '~/Dropbox/Research/HEI Accountability Grant/Final Report/Revised Report/Reproducibility Files for Dataverse/Case Study 1_PM10 Nonattainment/Data Construction/'

source('mybalcheck.R')
source('SummarizeRegressionFunctions.R')
source('ReadSimsFunction.R')
getrate=function(vec, denom){
  return(vec*1000/denom)
}

#########   Say which results you will use for the final analysis   #####
# Select which measure to use for "baseline" pollution
basename = "pm10base1990"
basenameorig = "baseorig1990"

# Select which measure to use for "follow up" pollution
funame = "pm10fu"

whichresults = "Principal Stratification Analysis/"

whichresults_noprune = "Principal Stratification Analysis without Pruning/"

Cede= 5
Cede2 = 1.2
Ceae = -5
Ceae2 = -1.2
whichresults_pollution = "Pollution Only Analysis/"

whichresults_pollution_noprune = "Pollution Only Analysis without Pruning/"
pollutiontransform = "log"

nburn = 5000
thin = 10
nsamp = 32000

#### Read in the Data ######
load(file=paste(datadir, "pm10analysis.RData", sep=""))

datorig = dat

dat$pm10base = dat[, basename]
dat$baseorig = dat[, basenameorig]
dat$pm10fu = dat[, funame]

#### Select which variables are going to be included in the various models ######
vars = c("Median_income", "HS_rate", "Urban_rate", "Migration_5_year_rate", "Hispanic_cens", "White_cens", "Black_cens",
         "Current_Smoking_rate_weighted", "Female_cens", "mean_age.2001", "Female_rate.2001", "White_rate.2001", "Black_rate.2001",
         "Tot_Beneficiary.2001", "pm10base", "housedens", "temperature", "logpop")

#### Select other variables that you have to keep track of throughout the analysis #######
othervars = c("Monitor", "a", "baseorig", "temporig", "pm10fu", "POPULATION",
              "Total_death.2001", "ALL_CVD.2001", "Respiratory.2001", "pyear.2001", "Longitude", "Latitude")

dat=dat[, names(dat) %in% c(othervars, vars)]
(length(vars) + length(othervars)) == (dim(dat)[[2]])
# which(!(vars %in% names(dat)))

###### Create "Table 1" datatab ######
tabvars = c("pm10base", ## Monitor Level
         "Tot_Beneficiary.2001","mean_age.2001", "Female_rate.2001", "White_rate.2001", "Black_rate.2001", ## Medicare Level
         "POPULATION", "housedens", "Urban_rate", "Median_income", "HS_rate",  "Migration_5_year_rate",
         "Current_Smoking_rate_weighted", "temperature","Hispanic_cens", "White_cens", "Black_cens","Female_cens")  ## County level
varnames=c("Ambient PM10 1990$^*$",  ### Monitor-Level
           "Medicare Beneficiaries$^*$", "Age$^*$", "\\% Female$^*$", "\\% White$^*$", "\\% Black$^*$", ### Medicare (monitor-level)
           "Population$^*$", "Housing Density$^*$", "\\% Urban Living$^*$", "Median Income$^*$", "\\% High School Graduates$^*$", "5-Year Migration Rate$^*$", "Smoking Rate$^*$", "Annual Maximum Temperature$^*$",
           "\\% Hispanic", "\\% White", "\\% Black", "\\% Female")  ## $^*$ for in PS, $^**$ for in outcome too

dat$mortrate = dat$Total_death.2001 * 1000 / dat$Tot_Beneficiary.2001
dat$CVDrate = dat$ALL_CVD.2001 * 1000 / dat$pyear.2001
dat$Resprate = dat$Respiratory.2001 * 1000 / dat$pyear.2001

outvars = c("pm10fu", "mortrate", "CVDrate", "Resprate")
outnames = c("Ambient PM10 1999-2001", "Mortality Rate", "Hospitalization Rate: CVD", "Hospitalization Rate: Respiratory")

allvars = c(tabvars, outvars)
allnames = c(varnames, outnames)
vartab = matrix(NA, nrow=length(allvars), ncol=4)
dimnames(vartab)[[1]] = allnames
dimnames(vartab)[[2]] = rep(c("Mean", "SD"), 2)
for (i in 1:length(allvars) ){
  vartab[i,c(1,3)] = with(dat, tapply(dat[,allvars[i]], a, mean, na.rm=TRUE))
  vartab[i,c(2,4)] = with(dat, tapply(dat[,allvars[i]], a, sd, na.rm=TRUE))
  #  vartab[i,3] = t.test(dat[, vars[i]]~dat[, "a"])$p.value
}

## convert decimals to percentages
vartab[c(4,5,6,9,11,12,13,15,16,17,18),] = 100*vartab[c(4,5,6,9,11,12,13,15,16,17,18),]
write.table(round(vartab,2), file ="vartab.txt", sep="&", eol="\\\\ \n", row.names=TRUE, col.names=FALSE, quote=FALSE)
```

# Propensity Score Analysis
```{r, echo=FALSE}
###############################################################
#####     Estimate the Propensity Score      ##################
###############################################################

psmod = glm(a~. + pm10base*temperature, family=binomial(link="logit"), data=dat[, (names(dat) %in% c("a", vars) & !(names(dat) %in% c("Monitor", "Female_cens", "Hispanic_cens","White_cens", "Black_cens")))])

dat$ps = psmod$fitted.values
mina1 = min(dat$ps[dat$a==1])
maxa0 = max(dat$ps[dat$a==0])
dat$outofrange = 0

print(mina1)
print(maxa0)

###### Prune based strictly on range of observed PS  ######
 dat$outofrange[dat$ps>maxa0 & dat$a==1]=1
 dat$outofrange[dat$ps<mina1 & dat$a==0]=1


with(dat, table(outofrange))
with(dat, table(a, outofrange))


##### Define PS categories ##################
## Quantiles
nblock   <- 5
breaks	<- as.numeric(quantile(dat$ps[dat$outofrange==0], probs = seq(from=0, to=1, by = 1/nblock)))
dat$pscat	<- as.numeric(cut(dat$ps, breaks= breaks, include.lowest=T))
dat$pscat = as.factor(dat$pscat)

with(dat, table(pscat))
with(dat, table(a,pscat))


######  Propensity Score Histograms  ########
colors = c(rgb(0,0,1,.5), rgb(1,0,0,.5))

```

## Propensity Score Histograms 
```{r, fig.width=4, echo=FALSE}
# pdf(paste(picdir, "pshists_raw.pdf", sep=""))
with(subset(dat, a==0), hist(ps, breaks=50, col=colors[1], main ="", xlab = "Estimated Propensity Scores", xlim=c(0,1)))
with(subset(dat, a==1), hist(ps, breaks=50, col=colors[2], add = TRUE))
legend("topleft", c("Attainment", "Nonattainment"), fill = colors, bty="n")
# dev.off()

 #pdf(paste(picdir, "pshists_pruned.pdf", sep=""))
with(subset(dat, a==0 & outofrange==0), hist(ps, breaks=50, col=colors[1], main = "", xlab = "Estimated Propensity Scores", xlim=c(0,1)))
with(subset(dat, a==1 & outofrange==0), hist(ps, breaks=50, col=colors[2], add = TRUE))
legend("topleft", c("Attainment", "Nonattainment"), fill = colors, bty="n")
 #dev.off()
```


## Plot Covariate Balance 
```{r, fig.width=5, echo=FALSE}
printvars = vars
covnames=c("Income", "High School", "Urban Living", "5 Yr. Migration", "Hispanic (cens)", "% White (cens)", "% Black (cens)",  "Smoking", "% Female (cens)", "Age", 
           "% Female (med)", "% White (med)", "% Black (med)", "Med. Benefic.",  "Baseline PM", "Housing Denisty", "Pop", "Temp")
length(printvars) == length(covnames)


#x11(height=10, width=10)
 #pdf(paste(picdir, "balplot.pdf", sep=""), height=10, width=10)

# Not absolute Effect Size
bal=mybalcheck(covariates=dat[, printvars], treatment=dat$a, propensity=dat$ps, strata=dat$pscat, absolute.es=FALSE, xlim=c(-1,1.5), outofrange=dat$outofrange, universal.psd=FALSE, plot.strata=FALSE, covnames = covnames)

 #dev.off()

##### Keep the data set going..... ########
## Add propensity score variables to the original data so that everything is there
outdat = cbind(dat, dat$ps, dat$pscat, dat$outofrange)
names(outdat) = c(names(dat), "ps", "pscat", "outofrange")
dat = outdat
## if you want to save the data set with the propensity scores
# save(outdat, file = 'pm10_withps.Rda')
```


# Preliminary Analyses

###############################################################
#############     Preliminary Analyses     ####################
###############################################################
### Study Area Maps
```{r,  echo=FALSE}
## Map of all locations
nmap = dim(dat)[[1]]
 #pdf(paste(picdir, '/monitormap_raw.pdf', sep=""))
map("county", xlim=c(-125,-100), ylim=c(25,50))
symbols=c(16,16)
colors=c(rgb(0,0,1,.5), rgb(1,0,0,.5))
with(dat, points(Longitude, Latitude, pch=symbols[a+1],cex=1, col=colors[a+1]))
legend(-124,31.5, pch=symbols,col=colors, c("Attainment", "Nonattainment"), bty="n")
text(-117.8,28.25, paste("n=", nmap, " Monitors", sep=""))
 #dev.off()

## Map of pruned locations
nmap = sum(dat$outofrange==0) 
 #pdf(paste(picdir, '/monitormap_pruned.pdf', sep=""))
map("county", xlim=c(-125,-100), ylim=c(25,50))
symbols=c(16,16)
colors=c(rgb(0,0,1,.5), rgb(1,0,0,.5))
with(subset(dat, outofrange==0), points(Longitude, Latitude, pch=symbols[a+1],cex=1, col=colors[a+1]))
legend(-124,31.5, pch=symbols,col=colors, c("Attainment", "Nonattainment"), bty="n")
text(-117.8,28.25, paste("n=", nmap, " Monitors", sep=""))
 #dev.off()

nmap = sum(dat$outofrange==1) 
 #pdf(paste(picdir, '/monitormap_excluded.pdf', sep=""))
map("county", xlim=c(-125,-100), ylim=c(25,50))
symbols=c(16,16)
colors=c(rgb(0,0,1,.5), rgb(1,0,0,.5))
with(subset(dat, outofrange==1), points(Longitude, Latitude, pch=symbols[a+1],cex=1, col=colors[a+1]))
legend(-124,31.5, pch=symbols,col=colors, c("Attainment", "Nonattainment"), bty="n")
text(-117.8,28.25, paste("n=", nmap, " Monitors", sep=""))
 #dev.off()
```

### Total Number of Medicare Beneficiaries (Total, inrange, outofrange)
```{r,  echo=FALSE}
sum(dat$Tot_Beneficiary.2001)
sum(dat$Tot_Beneficiary.2001[dat$outofrange==0])
sum(dat$Tot_Beneficiary.2001[dat$outofrange==1])
```

### % of Medicare Beneficiaries in Nonattainment Areas
```{r,  echo=FALSE}
with(dat, table(a))

with(subset(dat, a==1), sum(Tot_Beneficiary.2001))
with(subset(dat, a==1), sum(Tot_Beneficiary.2001)) / with(dat, sum(Tot_Beneficiary.2001))
```

### Missing Baseline Pollution
```{r,  echo=FALSE}
with(dat, table(is.na(baseorig)))
with(dat, table(a, is.na(baseorig)))
```
### Missing Follow Up Pollution
```{r,  echo=FALSE}
with(dat, table(is.na(pm10fu)))
with(dat, table(a, is.na(pm10fu)))
```

### Missing Baseline Temperature
```{r,echo=FALSE}
with(dat, table(is.na(temporig)))
with(dat, table(a, is.na(temporig)))
```

## Analysis of Pollution outcomes
### Baseline and Follow up Pollution
```{r, echo=FALSE}
#########################     Analysis of Pollution Outcomes      ######################### 
########## Simple Comparisons ###############
dat$y = log(dat$pm10fu)
obsdat=subset(dat, !is.na(y)) #not pruned
d0 = obsdat
d0$a = 0
d1 = obsdat
d1$a = 1

## Mean comparisons
with(obsdat, tapply(pm10base,a,mean, na.rm=TRUE))
with(obsdat, tapply(exp(y), a, mean))
#with(obsdat, tapply(exp(y)-pm10base,a,mean, na.rm=TRUE))
with(obsdat, t.test(exp(y)-pm10base~a, na.rm=TRUE))

## Regression models including all covariates except the census variables for race and %female
naivereg = lm(y~., data=obsdat[, names(obsdat) %in% c(vars, "y", "a") & !(names(obsdat) %in% c("Female_cens", "Hispanic_cens","White_cens", "Black_cens"))])

summarizereg(naivereg)
summary(naivereg)$coef["a", 4]
```

## Analysis of Health Outcomes
### Simple T-tests of Mortality, CVD, Respiratory Rate
```{r, echo=FALSE}
#########################     Analysis of Health Outcomes      ######################### 
###### Simple Comparisons ######
with(dat, t.test(mortrate~a))
with(dat, t.test(CVDrate~a))
with(dat, t.test(Resprate~a))

##########   Regression Models    ###########
obsdat=dat #not pruned
d0 = obsdat
d0$a=0
d1 = obsdat
d1$a=1

## Regression model for Health
naivereg_mort = glm(obsdat$Total_death.2001~. + offset(log(obsdat$Tot_Beneficiary.2001)), family = poisson(link="log"), data=obsdat[, names(obsdat) %in% c(vars, "a", "h") & !(names(obsdat) %in% c("Tot_Beneficiary.2001", "Female_cens", "Hispanic_cens","White_cens", "Black_cens"))])
naivereg_cvd = glm(obsdat$ALL_CVD.2001~. + offset(log(obsdat$pyear.2001)), family = poisson(link="log"), data=obsdat[, names(obsdat) %in% c(vars, "a", "h") & !(names(obsdat) %in% c("Tot_Beneficiary.2001", "Female_cens", "Hispanic_cens","White_cens", "Black_cens"))])
naivereg_resp = glm(obsdat$Respiratory.2001~. + offset(log(obsdat$pyear.2001)), family = poisson(link="log"), data=obsdat[, names(obsdat) %in% c(vars, "a", "h") & !(names(obsdat) %in% c("Tot_Beneficiary.2001", "Female_cens", "Hispanic_cens","White_cens", "Black_cens"))])
```

### Niave Mortality Analysis
```{r, echo=FALSE}
summarizereg_health(naivereg_mort, "Total_death.2001", dat$Tot_Beneficiary.2001)
exp(naivereg_mort$coef['a'])
summary(naivereg_mort)$coef['a', 4]
```

### Niave CVD Analysis
```{r, echo=FALSE}
summarizereg_health(naivereg_cvd, "ALL_CVD.2001", obsdat$pyear.2001)
exp(naivereg_cvd$coef['a'])
summary(naivereg_cvd)$coef['a', 4]
```

### Niave Respiratory Analysis
```{r, echo=FALSE}
summarizereg_health(naivereg_resp, "Respiratory.2001", obsdat$pyear.2001)
exp(naivereg_resp$coef['a'])
summary(naivereg_resp)$coef['a', 4]
```


# Final Analysis
```{r, echo=FALSE}
###############################################################
#############     Final Analyses     ####################
###############################################################
dat = subset(dat, outofrange==0)

##### Pollution Only Model #######
attyarray_poll = array(NA, c(1, 5, 1))
dimnames(attyarray_poll) = list("", c("Mean", "SD", "2.5%", "97.5%", "P(<0)"), pollutiontransform)

#nburn = 5000
#thin = 10
# Load Results #
load(paste(whichresults_pollution, "pollutionmodel_temp_", pollutiontransform, ".Rda", sep=""))
## if temporary, object is called "out"
mod = out
    
y0out = exp(mod$y0[seq(nburn,nsamp,thin),])
y1out = exp(mod$y1[seq(nburn,nsamp,thin),])

# Calculate ATTs #
ceymat = y1out-y0out
atty = rowMeans(ceymat[, dat$a==1])
attyarray_poll[1,"Mean",1] = mean(atty)
attyarray_poll[1,"SD",1] = sd(atty)
attyarray_poll[1,c("2.5%", "97.5%"),1] = quantile(atty, c(.025, .975))
attyarray_poll[1,"P(<0)",1] = sum(atty<0)/length(atty)

## Check effect for locations with high/low baseline pollution
cutpoint = 45
ceymat_gt = ceymat[, dat$a==1 & dat$pm10base>=cutpoint]
att_gt = rowMeans(ceymat_gt)
mean(att_gt)
quantile(att_gt, c(.025, .975))
with(dat, table(a, pm10base>cutpoint))


########   Health and Pollution Models     ########
healthnames = c("mort", "cvd", "resp")
healthoutcomes = length(healthnames)

attyarray = array(NA, c(healthoutcomes, 5))
dimnames(attyarray) = list(healthnames, c("Mean", "SD", "2.5%", "97.5%", "P(<0)"))
attharray = attyarray
edearray = attharray
eaearray = attharray
nedearray = attharray
neaearray = attharray
edearray2 = attharray
eaearray2 = attharray
nedearray2 = attharray
neaearray2 = attharray

for (hlth in 1:3){
  sims = getsims("log", healthnames[hlth], Cede, Ceae, paste(whichresults, "pstratamod_temp_", healthnames[hlth], ".Rda", sep=""), nsamp)
  atty = sims[["atty"]]
  atth = sims[["atth"]]
  ede  = sims[["ede"]]
  nede = sims[["nede"]]
  eae  = sims[["eae"]]
  neae = sims[["neae"]]
  
  attyarray[hlth,"Mean"] = mean(atty)
  attyarray[hlth,"SD"] = sd(atty)
  attyarray[hlth,c("2.5%", "97.5%")] = quantile(atty, c(.025, .975))
  attyarray[hlth,"P(<0)"] = sum(atty<0)/length(atty)
  
  attharray[hlth,"Mean"] = mean(atth)
  attharray[hlth,"SD"] = sd(atth)
  attharray[hlth,c("2.5%", "97.5%")] = quantile(atth, c(.025, .975))
  attharray[hlth,"P(<0)"] = sum(atth<0)/length(atth)
  
  edearray[hlth,"Mean"] = mean(ede, na.rm=TRUE)
  edearray[hlth,"SD"] = sd(ede, na.rm=TRUE)
  edearray[hlth,c("2.5%", "97.5%")] = quantile(ede, c(.025, .975), na.rm=TRUE)
  edearray[hlth,"P(<0)"] = sum(ede<0, na.rm=TRUE)/length(ede)
  nedearray[hlth, "Mean"] = mean(nede)
  nedearray[hlth, "SD"] = sd(nede)
  nedearray[hlth, "P(<0)"] = sum(nede==0)
  
  eaearray[hlth,"Mean"] = mean(eae, na.rm=TRUE)
  eaearray[hlth,"SD"] = sd(eae, na.rm=TRUE)
  eaearray[hlth,c("2.5%", "97.5%")] = quantile(eae, c(.025, .975), na.rm=TRUE)
  eaearray[hlth,"P(<0)"] = sum(eae<0, na.rm=TRUE)/length(eae)
  neaearray[hlth, "Mean"] = mean(neae)
  neaearray[hlth, "SD"] = sd(neae)
  neaearray[hlth, "P(<0)"] = sum(neae==0)
  
  ## -- And again to calculate EDE and EAE with different values of C
  sims = getsims("log", healthnames[hlth], Cede2, Ceae2, 
                 paste(whichresults, "pstratamod_temp_", healthnames[hlth], ".Rda", sep=""), nsamp)
  ede  = sims[["ede"]]
  nede = sims[["nede"]]
  eae  = sims[["eae"]]
  neae = sims[["neae"]]
  
  edearray2[hlth,"Mean"] = mean(ede, na.rm=TRUE)
  edearray2[hlth,"SD"] = sd(ede, na.rm=TRUE)
  edearray2[hlth,c("2.5%", "97.5%")] = quantile(ede, c(.025, .975), na.rm=TRUE)
  edearray2[hlth,"P(<0)"] = sum(ede<0, na.rm=TRUE)/length(ede)
  nedearray2[hlth, "Mean"] = mean(nede)
  nedearray2[hlth, "SD"] = sd(nede)
  nedearray2[hlth, "P(<0)"] = sum(nede==0)
  
  eaearray2[hlth,"Mean"] = mean(eae, na.rm=TRUE)
  eaearray2[hlth,"SD"] = sd(eae, na.rm=TRUE)
  eaearray2[hlth,c("2.5%", "97.5%")] = quantile(eae, c(.025, .975), na.rm=TRUE)
  eaearray2[hlth,"P(<0)"] = sum(eae<0, na.rm=TRUE)/length(eae)
  neaearray2[hlth, "Mean"] = mean(neae)
  neaearray2[hlth, "SD"] = sd(neae)
  neaearray2[hlth, "P(<0)"] = sum(neae==0)
  
}#hlth


```


##Summary of All Causal Effects
```{r, echo=FALSE}
####     Make results summary table     ######
cetab = matrix(NA, nrow = 4, ncol = 9)
dimnames(cetab) = list(c("Ambient PM10", "All-Cause Mortality", "CVD Hospitalization", "Respiratory Hospitalization"), c("ATTm", "ATT2.5", "ATT97.5", "EDEm", "EDE2.5", "EDE97.5", "EAEm", "EAE2.5", "EAE97.5"))

cetab["Ambient PM10", c("ATTm", "ATT2.5", "ATT97.5")] = attyarray_poll[1, c("Mean", "2.5%", "97.5%"), 1]
cetab["All-Cause Mortality", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["resp", c("Mean", "2.5%", "97.5%")]

cetab["All-Cause Mortality", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["resp", c("Mean", "2.5%", "97.5%")]

cetab["All-Cause Mortality", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["resp", c("Mean", "2.5%", "97.5%")]

round(cetab,2)
write.table(round(cetab,2), file=paste(tabdir, "cetab.txt", sep=""), sep="&", eol="\\\\ \n", row.names=TRUE, col.names=FALSE, quote=FALSE)

### -- And again for different values for EDE and EAE
cetab["All-Cause Mortality", c("EDEm", "EDE2.5", "EDE97.5")] = edearray2["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray2["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray2["resp", c("Mean", "2.5%", "97.5%")]

cetab["All-Cause Mortality", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray2["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray2["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray2["resp", c("Mean", "2.5%", "97.5%")]

round(cetab,2)
write.table(round(cetab,2), file=paste(tabdir, "cetab2.txt", sep=""), sep="&", eol="\\\\ \n", row.names=TRUE, col.names=FALSE, quote=FALSE)


####### Line Plots of 95% Posterior Intervals #############
mort = getsims("log", "mort", Cede, Ceae, paste(whichresults, "pstratamod_temp_", "mort", ".Rda", sep=""), nsamp)
cvd = getsims("log", "cvd", Cede, Ceae, paste(whichresults, "pstratamod_temp_", "cvd", ".Rda", sep=""), nsamp)
resp = getsims("log", "resp", Cede, Ceae, paste(whichresults, "pstratamod_temp_", "resp", ".Rda", sep=""), nsamp)

#pdf(file=paste(picdir, "cehline.pdf", sep=""))
par(mfrow = c(1,1))
niter_mort = length(mort$atth)
niter_resp = length(resp$atth)
niter_cvd = length(cvd$atth)
boxnames = c("", round(mean(mort$nede),1), round(mean(mort$neae),1), "", round(mean(cvd$nede),1), round(mean(cvd$neae),1), "", round(mean(resp$nede),1), round(mean(resp$neae),1))
m = c(mean(mort$atth), mean(mort$ede), mean(mort$eae), 
      mean(cvd$atth), mean(cvd$ede), mean(cvd$eae),
      mean(resp$atth), mean(resp$ede), mean(resp$eae))
p2.5 = c(quantile(mort$atth, .025), quantile(mort$ede, .025),quantile(mort$eae, .025),
         quantile(cvd$atth, .025), quantile(cvd$ede, .025),quantile(cvd$eae, .025),
         quantile(resp$atth, .025), quantile(resp$ede, .025),quantile(resp$eae, .025))
p97.5 = c(quantile(mort$atth, .975), quantile(mort$ede, .975),quantile(mort$eae, .975),
          quantile(cvd$atth, .975), quantile(cvd$ede, .975),quantile(cvd$eae, .975),
          quantile(resp$atth, .975), quantile(resp$ede, .975),quantile(resp$eae, .975))

lims=range(c(m, p2.5, p97.5))
xvals = c(1,2,3, 5,6,7, 9,10,11)
cols = rep(c(1,2,3), 3)
plot(c(1,11), lims, type="n", axes=FALSE, ylab="Causal Effect", xlab="")
points(xvals, m, pch=16, col=cols)
arrows(xvals, p2.5, xvals, p97.5, lwd=2, length=0, col=cols)
lines(c(-1000,1000), c(0,0), lty = 2)
# mtext(at=c(1:3, 5:7, 9:11), boxnames)
# axis(side = 2, at = seq(min(lims), max(lims), length=10), labels = round(seq(min(lims), max(lims), length=10), 1))
axis(side = 2, at = seq(-12, 12, 1), labels = seq(-12, 12, 1))
axis(side = 1, at = c(1,2,3), label = c("ATT", "EDE", "EAE"))    
mtext(side = 1, at=2, line = 2, "All-Cause Mortality")
mtext(side = 1, at=2, line = 3, "Per 1000 Beneficiaries")
axis(side = 1, at = c(5,6,7), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=6, line = 2, "CVD Hospitalization")
mtext(side = 1, at=6, line = 3, "Per 1000 Person-Years")
axis(side = 1, at = c(9,10,11), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=10, line = 2, "Respir. Hospitalization")
mtext(side = 1, at=10, line = 3, "Per 1000 Person-Years")

#dev.off()

#### --- And again for different calculations of EDE and EAE
mort = getsims("log", "mort", Cede2, Ceae2, paste(whichresults, "pstratamod_temp_", "mort", ".Rda", sep=""), nsamp)
cvd = getsims("log", "cvd", Cede2, Ceae2, paste(whichresults, "pstratamod_temp_", "cvd", ".Rda", sep=""), nsamp)
resp = getsims("log", "resp", Cede2, Ceae2, paste(whichresults, "pstratamod_temp_", "resp", ".Rda", sep=""), nsamp)

#pdf(file=paste(picdir, "cehline2.pdf", sep=""))
par(mfrow = c(1,1))
niter_mort = length(mort$atth)
niter_resp = length(resp$atth)
niter_cvd = length(cvd$atth)
boxnames = c("", round(mean(mort$nede),1), round(mean(mort$neae),1), "", round(mean(cvd$nede),1), round(mean(cvd$neae),1), "", round(mean(resp$nede),1), round(mean(resp$neae),1))
m = c(mean(mort$atth), mean(mort$ede), mean(mort$eae), 
      mean(cvd$atth), mean(cvd$ede), mean(cvd$eae),
      mean(resp$atth), mean(resp$ede), mean(resp$eae))
p2.5 = c(quantile(mort$atth, .025), quantile(mort$ede, .025),quantile(mort$eae, .025),
         quantile(cvd$atth, .025), quantile(cvd$ede, .025),quantile(cvd$eae, .025),
         quantile(resp$atth, .025), quantile(resp$ede, .025),quantile(resp$eae, .025))
p97.5 = c(quantile(mort$atth, .975), quantile(mort$ede, .975),quantile(mort$eae, .975),
          quantile(cvd$atth, .975), quantile(cvd$ede, .975),quantile(cvd$eae, .975),
          quantile(resp$atth, .975), quantile(resp$ede, .975),quantile(resp$eae, .975))

lims=range(c(m, p2.5, p97.5))
xvals = c(1,2,3, 5,6,7, 9,10,11)
cols = rep(c(1,2,3), 3)
plot(c(1,11), lims, type="n", axes=FALSE, ylab="Causal Effect", xlab="")
points(xvals, m, pch=16, col=cols)
arrows(xvals, p2.5, xvals, p97.5, lwd=2, length=0, col=cols)
lines(c(-1000,1000), c(0,0), lty = 2)
# mtext(at=c(1:3, 5:7, 9:11), boxnames)
# axis(side = 2, at = seq(min(lims), max(lims), length=10), labels = round(seq(min(lims), max(lims), length=10), 1))
axis(side = 2, at = seq(-12, 12, 1), labels = seq(-12, 12, 1))
axis(side = 1, at = c(1,2,3), label = c("ATT", "EDE", "EAE"))    
mtext(side = 1, at=2, line = 2, "All-Cause Mortality")
mtext(side = 1, at=2, line = 3, "Per 1000 Beneficiaries")
axis(side = 1, at = c(5,6,7), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=6, line = 2, "CVD Hospitalization")
mtext(side = 1, at=6, line = 3, "Per 1000 Person-Years")
axis(side = 1, at = c(9,10,11), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=10, line = 2, "Respir. Hospitalization")
mtext(side = 1, at=10, line = 3, "Per 1000 Person-Years")

#dev.off()

```


# Sensitivity Analysis without propensity score pruning
```{r, echo=FALSE}
###############################################################
#############     No Pruning Analyses     ####################
###############################################################
dat = outdat

##### Pollution Only Model #######
attyarray_poll = array(NA, c(1, 5, 1))
dimnames(attyarray_poll) = list("", c("Mean", "SD", "2.5%", "97.5%", "P(<0)"), pollutiontransform)

#nburn = 5000
#thin = 10
# Load Results #
load(paste(whichresults_pollution_noprune, "pollutionmod_", pollutiontransform, ".Rda", sep=""))

y0out = exp(mod$y0[seq(nburn,nsamp,thin),])
y1out = exp(mod$y1[seq(nburn,nsamp,thin),])

# Calculate ATTs #
ceymat = y1out-y0out
atty = rowMeans(ceymat[, dat$a==1])
attyarray_poll[1,"Mean",1] = mean(atty)
attyarray_poll[1,"SD",1] = sd(atty)
attyarray_poll[1,c("2.5%", "97.5%"),1] = quantile(atty, c(.025, .975))
attyarray_poll[1,"P(<0)",1] = sum(atty<0)/length(atty)


########   Health and Pollution Models     ########
healthnames = c("mort", "cvd", "resp")
healthoutcomes = length(healthnames)

attyarray = array(NA, c(healthoutcomes, 5))
dimnames(attyarray) = list(healthnames, c("Mean", "SD", "2.5%", "97.5%", "P(<0)"))
attharray = attyarray
edearray = attharray
eaearray = attharray
nedearray = attharray
neaearray = attharray


nsamp = 28500
for (hlth in 1:3){
  sims = getsims("log", healthnames[hlth], Cede, Ceae, 
                 paste(whichresults_noprune, "pstratamod_temp_", healthnames[hlth], ".Rda", sep=""), nsamp)
  atty = sims[["atty"]]
  atth = sims[["atth"]]
  ede  = sims[["ede"]]
  nede = sims[["nede"]]
  eae  = sims[["eae"]]
  neae = sims[["neae"]]
  
  attyarray[hlth,"Mean"] = mean(atty)
  attyarray[hlth,"SD"] = sd(atty)
  attyarray[hlth,c("2.5%", "97.5%")] = quantile(atty, c(.025, .975))
  attyarray[hlth,"P(<0)"] = sum(atty<0)/length(atty)
  
  attharray[hlth,"Mean"] = mean(atth)
  attharray[hlth,"SD"] = sd(atth)
  attharray[hlth,c("2.5%", "97.5%")] = quantile(atth, c(.025, .975))
  attharray[hlth,"P(<0)"] = sum(atth<0)/length(atth)
  
  edearray[hlth,"Mean"] = mean(ede, na.rm=TRUE)
  edearray[hlth,"SD"] = sd(ede, na.rm=TRUE)
  edearray[hlth,c("2.5%", "97.5%")] = quantile(ede, c(.025, .975), na.rm=TRUE)
  edearray[hlth,"P(<0)"] = sum(ede<0, na.rm=TRUE)/length(ede)
  nedearray[hlth, "Mean"] = mean(nede)
  nedearray[hlth, "SD"] = sd(nede)
  nedearray[hlth, "P(<0)"] = sum(nede==0)
  
  eaearray[hlth,"Mean"] = mean(eae, na.rm=TRUE)
  eaearray[hlth,"SD"] = sd(eae, na.rm=TRUE)
  eaearray[hlth,c("2.5%", "97.5%")] = quantile(eae, c(.025, .975), na.rm=TRUE)
  eaearray[hlth,"P(<0)"] = sum(eae<0, na.rm=TRUE)/length(eae)
  neaearray[hlth, "Mean"] = mean(neae)
  neaearray[hlth, "SD"] = sd(neae)
  neaearray[hlth, "P(<0)"] = sum(neae==0)
  
}#hlth


```


##Summary of All Causal Effects without pruning
```{r, echo=FALSE}
####     Make results summary table     ######
cetab = matrix(NA, nrow = 4, ncol = 9)
dimnames(cetab) = list(c("Ambient PM10", "All-Cause Mortality", "CVD Hospitalization", "Respiratory Hospitalization"), c("ATTm", "ATT2.5", "ATT97.5", "EDEm", "EDE2.5", "EDE97.5", "EAEm", "EAE2.5", "EAE97.5"))

cetab["Ambient PM10", c("ATTm", "ATT2.5", "ATT97.5")] = attyarray_poll[1, c("Mean", "2.5%", "97.5%"), 1]
cetab["All-Cause Mortality", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("ATTm", "ATT2.5", "ATT97.5")] = attharray["resp", c("Mean", "2.5%", "97.5%")]

cetab["All-Cause Mortality", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EDEm", "EDE2.5", "EDE97.5")] = edearray["resp", c("Mean", "2.5%", "97.5%")]

cetab["All-Cause Mortality", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["mort", c("Mean", "2.5%", "97.5%")]
cetab["CVD Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["cvd", c("Mean", "2.5%", "97.5%")]
cetab["Respiratory Hospitalization", c("EAEm", "EAE2.5", "EAE97.5")] = eaearray["resp", c("Mean", "2.5%", "97.5%")]

round(cetab,2)
write.table(round(cetab,2), file=paste(tabdir, "cetab_noprune.txt", sep=""), sep="&", eol="\\\\ \n", row.names=TRUE, col.names=FALSE, quote=FALSE)


####### Line Plots of 95% Posterior Intervals #############
mort = getsims("log", "mort", Cede, Ceae, paste(whichresults_noprune, "pstratamod_temp_", "mort", ".Rda", sep=""), nsamp)
cvd = getsims("log", "cvd", Cede, Ceae, paste(whichresults_noprune, "pstratamod_temp_", "cvd", ".Rda", sep=""), nsamp)
resp = getsims("log", "resp", Cede, Ceae, paste(whichresults_noprune, "pstratamod_temp_", "resp", ".Rda", sep=""), nsamp)

# pdf(file=paste(picdir, "cehline_noprune.pdf", sep=""))
par(mfrow = c(1,1))
niter_mort = length(mort$atth)
niter_resp = length(resp$atth)
niter_cvd = length(cvd$atth)
boxnames = c("", round(mean(mort$nede),1), round(mean(mort$neae),1), "", round(mean(cvd$nede),1), round(mean(cvd$neae),1), "", round(mean(resp$nede),1), round(mean(resp$neae),1))
m = c(mean(mort$atth), mean(mort$ede), mean(mort$eae), 
      mean(cvd$atth), mean(cvd$ede), mean(cvd$eae),
      mean(resp$atth), mean(resp$ede), mean(resp$eae))
p2.5 = c(quantile(mort$atth, .025), quantile(mort$ede, .025),quantile(mort$eae, .025),
         quantile(cvd$atth, .025), quantile(cvd$ede, .025),quantile(cvd$eae, .025),
         quantile(resp$atth, .025), quantile(resp$ede, .025),quantile(resp$eae, .025))
p97.5 = c(quantile(mort$atth, .975), quantile(mort$ede, .975),quantile(mort$eae, .975),
          quantile(cvd$atth, .975), quantile(cvd$ede, .975),quantile(cvd$eae, .975),
          quantile(resp$atth, .975), quantile(resp$ede, .975),quantile(resp$eae, .975))

lims=range(c(m, p2.5, p97.5))
xvals = c(1,2,3, 5,6,7, 9,10,11)
cols = rep(c(1,2,3), 3)
plot(c(1,11), lims, type="n", axes=FALSE, ylab="Causal Effect", xlab="")
points(xvals, m, pch=16, col=cols)
arrows(xvals, p2.5, xvals, p97.5, lwd=2, length=0, col=cols)
lines(c(-1000,1000), c(0,0), lty = 2)
# mtext(at=c(1:3, 5:7, 9:11), boxnames)
# axis(side = 2, at = seq(min(lims), max(lims), length=10), labels = round(seq(min(lims), max(lims), length=10), 1))
axis(side = 2, at = seq(-12, 12, 1), labels = seq(-12, 12, 1))
axis(side = 1, at = c(1,2,3), label = c("ATT", "EDE", "EAE"))    
mtext(side = 1, at=2, line = 2, "All-Cause Mortality")
mtext(side = 1, at=2, line = 3, "Per 1000 Beneficiaries")
axis(side = 1, at = c(5,6,7), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=6, line = 2, "CVD Hospitalization")
mtext(side = 1, at=6, line = 3, "Per 1000 Person-Years")
axis(side = 1, at = c(9,10,11), label = c("ATT", "EDE", "EAE"))
mtext(side = 1, at=10, line = 2, "Respir. Hospitalization")
mtext(side = 1, at=10, line = 3, "Per 1000 Person-Years")

# dev.off()

```

